********************************************************************************
** 	TITLE:		large_sample_example                                          ** 	
**	AUTHOR:	    Philippe Mongrain                                             **
**	DATA:       us2004_naes                                                   **
**	DATE:		October 2022 					                              **	
**	VERSION:	Stata 16	                                                  **
**  NOTE:       Based on Lin et al.	(2013), "Too Big to Fail" (ISR)           **	
********************************************************************************

* Version control

version 16.0

* Open log file

capture log close       			  			              
log using "large_sample_example", replace

* Set memory and matrices size

set mem 600m 

set matsize 5001
set more off

* Open the dataset

use "us2000_naes.dta", clear

* Specify the independent variable for the CPS plot

global m "discussion_3pts"

* Set the number of points to plot on the CPS plot

local z = 10000  

* Defining matrices to hold these values: b, p, t, se, n

matrix bvals = J(`z', 1, 0)
matrix pvals = J(`z', 1, 0)
matrix sevals = J(`z', 1, 0)
matrix tvals = J(`z', 1, 0)
matrix nvals = J(`z', 1, 0)

* Set seed to ensure replicability

set seed 1234
capture gen sampling = uniform()
local j = 0

forvalues i=0.0002(0.0002)1  {
	local j = `j' + 1
	disp `j'
	***  Define the model ***
	qui reg correct_whole_d discussion disagreement pidscale_whole interest_president news i.care_whole age i.male education time if sampling <= `i', robust
	matrix betas = e(b)
	matrix bvals[`j',1] = betas[1,1]
	matrix nvals[`j',1] = e(N)
	matrix vc = e(V)
	matrix sevals[`j',1] = sqrt(vc[1,1])
	qui testparm discussion
	matrix pvals[`j',1] = r(p)
* Divide the p-value by 2 for one-sided tests. Remove “/2” for two-sided tests
* Matrix pvals[`j',1] = r(p)/2
	matrix tvals[`j',1] = betas[1,1] / (sqrt(vc[1,1]))
}

drop _all

* Create a new dataset to include all the results from the iterations

svmat nvals
svmat bvals
svmat pvals
svmat sevals
svmat tvals

local parlist `n b p se t'
foreach z of local parlist {
	rename `z'vals1 `z'_discussion
}
save cps_discussion.dta, replace

* Generate a dual-panel CPS plot

scatter bvals1 n if n >= 100 & n <= 10000, title("(a) beta coefficients", size(medium) margin(b=3)) xtitle("Sample size", size(medsmall)) ytitle("Coefficients", size(medsmall) margin(r=2)) yline(0, lcolor(blue) lwidth(vthin)) scheme(s1mono) mcolor(black) msize(vsmall) msymbol(circle) xlabel(0(500)10000, nogrid labsize(small) angle(vertical)) ylabel(-0.02(0.02)0.12, nogrid labsize(small) angle(horizontal) format(%5.2f)) graphregion(color(white)) saving(coefficients.gph, replace)
scatter pvals n if n >= 100 & n <= 10000, title("(b) p-values", size(medium) margin(b=3)) xtitle("Sample size", size(medsmall)) ytitle("p-values", size(medsmall) margin(r=2)) scheme(s1mono) mcolor(black) msize(vsmall) msymbol(circle) xlabel(0(500)10000, nogrid labsize(small) angle(vertical)) ylabel(0(0.2)1, nogrid labsize(small) angle(horizontal) format(%5.2f)) yline(0.05, lcolor(red) lwidth(vthin)) graphregion(color(white)) saving(pvalues.gph, replace)
graph combine coefficients.gph pvalues.gph, col(1) graphregion(color(white)) saving(cps_discussion.gph, replace)

erase coefficients.gph
erase pvalues.gph

log close
set more on